Read in carbonate chemistry data (via Ana P) and proportional time
calculations (via Thomas D)
CC = read.csv('/Users/heidi.k.hirsh/Desktop/FLK_data/CCflk_plusBathy.csv') #make sure this is the most appropriate file
# CC = read.csv('/Users/heidi.k.hirsh/Documents/GitHub/test-flk/CCflkplusnuts.csv') #read in the version that has nutrients (both spatial and direct match) this only has 14 days so maybe I should use a version without ndays
dim(CC)/7/2 # 613.000000 8.714286
## [1] 115.0714 4.5000
ptime = st_read('/Users/heidi.k.hirsh/Desktop/FLK_data/BowtieFractions_Jan10.shp')
## Reading layer `BowtieFractions_Jan10' from data source
## `/Users/heidi.k.hirsh/Desktop/FLK_data/BowtieFractions_Jan10.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 38500 features and 5 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -81.83393 ymin: 24.39515 xmax: -80.04417 ymax: 25.6475
## Geodetic CRS: WGS 84
names(ptime)
## [1] "simu" "sampl_d" "ndays" "nrth_fr" "sth_frc" "geometry"
## Rename (why do names change with the original save?)
ptime=ptime %>% rename(sample_id=sampl_d,north_fraction=nrth_fr,south_fraction=sth_frc) #rename fraction columns
names(ptime)
## [1] "simu" "sample_id" "ndays" "north_fraction"
## [5] "south_fraction" "geometry"
## Reorder regions and zones to make sense for plotting
# CC$Sub_region = factor(CC$Sub_region, levels = c("BB","UK","MK","LK"))
CC$Sub_region = factor(CC$Sub_region, levels = c("LK","MK","UK","BB"))
CC$Zone = factor(CC$Zone, levels = c("Inshore","Mid channel","Offshore","Oceanic"))
## Assign each month by number (so months don't plot misleadingly in alphabetical order and years are comparable)
head(CC$UTCDate_Time)
## [1] "2010-03-08 12:26:00" "2010-03-08 13:05:00" "2010-03-08 17:17:00"
## [4] "2010-03-08 17:42:00" "2010-03-08 17:45:00" "2010-03-08 18:22:00"
CC$date= as.Date(CC$Date,format="%Y-%m-%d")
CC$month=format(CC$date, format="%B")
CC$month2=format(CC$date, format="%m")
CC$month2=as.numeric(CC$month2)
head(CC$month2)
## [1] 3 3 3 3 3 3
# above code is required if building off of CC (without nutrients)
## Define visit IDs for matching fractions with carbonate chemistry metadata
CC$visitID_ch1 = paste(CC$SiteID,CC$UTCDate_Time)
head(CC$visitID_ch1)
## [1] "1 2010-03-08 12:26:00" "3 2010-03-08 13:05:00"
## [3] "4 2010-03-08 17:17:00" "5 2010-03-08 17:42:00"
## [5] "5.5 2010-03-08 17:45:00" "6 2010-03-08 18:22:00"
CC$visitID_ch2 = gsub(" ", "_" , CC$visitID_ch1, perl=TRUE)
head(CC$visitID_ch2)
## [1] "1_2010-03-08_12:26:00" "3_2010-03-08_13:05:00"
## [3] "4_2010-03-08_17:17:00" "5_2010-03-08_17:42:00"
## [5] "5.5_2010-03-08_17:45:00" "6_2010-03-08_18:22:00"
CC$visitID = gsub("[: -]", "" , CC$visitID_ch2, perl=TRUE)
head(CC$visitID)
## [1] "1_20100308_122600" "3_20100308_130500" "4_20100308_171700"
## [4] "5_20100308_174200" "5.5_20100308_174500" "6_20100308_182200"
# CC already has this if it is the nutrients version
# CC$visitID=CC$visitID.x
# head(ptime)
# length(unique(CC$visitID)) #1610 #CC has more unique IDs. Why?
# length(unique(ptime$sample_id)) #1375
# dim(CC) #1611 69
# dim(ptime)/14/2 #multiplied by ndays and forward/backward ==> 1375, same as unique sample_id names
ptime$visitID = ptime$sample_id
# head(ptime)
## Merge data with left_join
CC_frac=NULL
CC_frac = left_join(ptime, CC, by="visitID")
dim(CC_frac)
## [1] 38500 75
#save CC_frac
write.csv(CC_frac, file='/Users/heidi.k.hirsh/Documents/GitHub/test-flk/CCfractions.csv')
names(CC_frac)
## [1] "simu" "sample_id" "ndays" "north_fraction"
## [5] "south_fraction" "visitID" "X.1" "X"
## [9] "CTDID" "Region" "Year" "Mission"
## [13] "Location" "Latitude" "Longitude" "UTCDate"
## [17] "UTCTime" "Sample_Depth_m" "DIC_umol_kg" "TA_umol_kg"
## [21] "pH_measured" "pH_calculated" "pCO2_uatm" "Aragonite_Sat_W"
## [25] "Salinity_Bottle" "Conductivity_Sm" "Salinity_CTD" "Temperature_C"
## [29] "Pressure_db" "Density_Sigmat" "SiteID" "Survey_design"
## [33] "Sample_frequency" "UTCDate_Time" "datetime" "ESTDate"
## [37] "ESTTime" "Month" "Months" "MY"
## [41] "MoY" "ToD" "Season" "Precipitation"
## [45] "dec.lon" "dec.lat" "Zone" "Sub_region"
## [49] "Reference" "Transect" "Extreme" "BestSalinity"
## [53] "n35TA" "n35DIC" "TA_0" "DIC_0"
## [57] "nTA" "nDIC" "Date" "TIMESTAMP_UTC"
## [61] "TIMESTAMP_LST" "jday.utc" "jday.lst" "hrod.lst"
## [65] "PAR_MODIS_DAILY" "PAR_MODIS_8DAY" "PAR_MODIS_MON" "pointDepth"
## [69] "resolution" "date" "month" "month2"
## [73] "visitID_ch1" "visitID_ch2" "geometry"
dim(CC_frac)/14/2
## [1] 1375.000000 2.678571
## exclude the forward bowtie data
CCb_frac=subset(CC_frac,simu=='backward') #19250 72
ploThis=CCb_frac
## Bar graph to compare spatial (facet by regions and zones) and temporal (xaxis = Month, facet by year) patterns in north fraction
#problems:
#does not separate out ndays and regions at the same time
#cannot distinguish no data versus 0/low fractions
#alphabetical months on xaxis = yikes
ggplot(data=ploThis, aes(fill=Zone, y=north_fraction, x=Month))+
geom_bar(position="dodge",stat="identity")+
facet_wrap(~ndays,ncol=2)+
theme_bw()

ggplot(data=ploThis, aes(fill=Zone, y=north_fraction, x=Month))+
geom_bar(position="dodge",stat="identity")+
facet_wrap(~Sub_region,ncol=2)+
theme_bw()

ggplot(data=ploThis, aes(fill=Zone, y=north_fraction, x=Month))+
geom_bar(position="dodge",stat="identity")+
facet_grid(Year ~ Sub_region)+
theme_bw()

ggplot(data=ploThis, aes(x=UTCDate_Time,y=north_fraction,color=as.factor(Year)))+
geom_point()+
facet_grid(Zone ~ Sub_region)+
theme_bw()

#yikes. no.
ggplot(data=ploThis, aes(x=month2,y=north_fraction,color=as.factor(ndays)))+
geom_point()+
facet_grid(Zone ~ Sub_region)+
theme_bw()

#problems:
#patterns are impossible
#decimal months
#ndays color scale is bad (lows and high number of days look the same)
Zones=c('Inshore', 'Mid channel', 'Offshore', 'Oceanic')
Regions=c('BB', 'UK', 'MK', 'LK')
Years=unique(CCb_frac$Year)
Ndays=unique(CCb_frac$ndays)
#loop through each year, plot, and save
#plot a line for each month showing change in fraction with ndays (xaxis)
y_i=1
# for (y_i in 1:length(Years)) {
plot_year= Years[y_i]
ploThis = subset(CCb_frac, Year==plot_year)
yearFrac = ggplot(data=ploThis, aes(y=north_fraction, x=ndays))+
# geom_point(aes(color=month2))+
geom_line(aes(color=month2,group=visitID))+
facet_grid(Zone ~ Sub_region)+
ggtitle(plot_year)+
theme_bw()
yearFrac

# ggsave(filename=paste0("/Users/heidi.k.hirsh/Desktop/FractionsExplore/",plot_year,"_northFraction_month.png"),plot=yearFrac,width = 10, height = 8, dpi = 300)
# }
#problems:
#number of months is not consistent so impossible to compare between plots
#not intuitive for ndays to be xaxis
#loop through regions to plot north fraction by month (for selected nday value)
#I don't know why I thought this would be helpful
#also decimal months again
#impossible to distinguish blues for years
ploThis=subset(CCb_frac,ndays==3)
r_i=3
# for (r_i in 1:length(Regions)) {
plot_region= Regions[r_i]
ploThis = subset(ploThis, Sub_region==plot_region)
regFrac = ggplot(data=ploThis, aes(y=north_fraction, x=month2))+
geom_point(aes(color=Year))+
geom_line(aes(color=Year,group=interaction(SiteID,Year)))+
facet_grid(Zone ~ Sub_region)+
ggtitle(plot_region)+
theme_bw()
regFrac

# ggsave(filename=paste0("/Users/heidi.k.hirsh/Desktop/FractionsExplore/",plot_year,"_northFraction_month.png"),plot=yearFrac,width = 10, height = 8, dpi = 300)
# }
ploThis=subset(CCb_frac,ndays==3)
ggplot(data=ploThis, aes(y=north_fraction, x=month2))+
geom_point(aes(color=Year))+
geom_line(aes(color=Year,group=interaction(SiteID,Year)))+
facet_grid(Zone ~ Sub_region)+
theme_bw()

#this is just ndays=3.
##Loop through and plot annual (monthly mean) lines for each ndays value
#loop through and plot annual lines for each number of days
pal <- wes_palette("Zissou1", n=9,type = "continuous") #cannot make a discrete palette with more than 5? colors
pal

unique(CCb_frac$Year )
## [1] 2012 2014 2015 2016 2017 2018 2019 2020 2021
#loop through ndays to compare annual trends in space and time
i=1
# for (i in 1:length(Ndays)) {
plot_n= Ndays[i]
ploThis = subset(CCb_frac, ndays==plot_n)
nFrac = ggplot(data=ploThis, aes(y=north_fraction, x=month2))+
geom_point(aes(color=Year))+
geom_line(aes(color=Year,group=interaction(SiteID,Year)))+
# scale_colour_continuous(colours = pal,breaks=seq(2012,2021,1)) + #trying to make a continuous color palette discrete (didn't work)
scale_colour_gradientn(colours = pal) +
facet_grid(Zone ~ Sub_region)+
ggtitle(paste('ndays =',plot_n))+
scale_x_continuous(breaks=c(1:12,1))+
theme_bw()
nFrac

# ggsave(filename=paste0("/Users/heidi.k.hirsh/Desktop/FractionsExplore/",plot_n,"_northFraction_ndays_zis.png"),plot=nFrac,width = 10, height = 8, dpi = 300)
# }
## Try violin plot (thank you Michael)
i=1
# for (i in 1:length(Ndays)) {
plot_n= Ndays[i]
ploThis = subset(CCb_frac, ndays==plot_n)
nFrac = ggplot(data=ploThis, aes(x=month2,y=north_fraction))+
geom_violin(aes(group=month2))+
stat_summary(fun.y=mean, geom="line", color='red', size=1)+
stat_summary(fun.y=mean, geom="point", color='black', size=2)+
# geom_boxplot(aes(group=month2),width=0.1)+ #puts boxplot inside the violin (too busy)
facet_grid(Zone ~ Sub_region)+
ggtitle(paste('ndays =',plot_n))+
scale_x_continuous(breaks=c(1:12,1))+
theme_bw()
## Warning: The `fun.y` argument of `stat_summary()` is deprecated as of ggplot2 3.3.0.
## ℹ Please use the `fun` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
nFrac
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Groups with fewer than two data points have been dropped.
## Groups with fewer than two data points have been dropped.

# ggsave(filename=paste0("/Users/heidi.k.hirsh/Desktop/FractionsExplore/yearViolins2/",plot_n,"_northFraction_ndaysViolin.png"),plot=nFrac,width = 10, height = 8, dpi = 300)
# }

